July 30, 2015

Data aquisition

  • Tracks comes from ENCODE project and were downloaded from GEO
  • For stronger signal replicates are combined using "bigWigMerge" tool
  • Combined tracks are z-scored using bash script with mean and sd calculated by "bigWigInfo" tool"

Example of the data pre-processing;

bigWigMerge GSM803485_hg19_wgEncodeHaibTfbsGm12878Pol24h8Pcr1xRawRep2.bigWig \
GSM803485_hg19_wgEncodeHaibTfbsGm12878Pol24h8Pcr1xRawRep1.bigWig \
GSM803355_hg19_wgEncodeHaibTfbsGm12878Pol2Pcr2xRawRep2.bigWig \
GSM803355_hg19_wgEncodeHaibTfbsGm12878Pol2Pcr2xRawRep1.bigWig merge.bedGraph

bedGraphToBigWig merge.bedGraph hg19.chrom.sizes merge.bw

mi=$(bigWigInfo merge.bw | grep mean | sed -n 's/mean: //pg')
sd=$(bigWigInfo merge.bw | grep std | sed -n 's/std: //pg')
cat merge.bedGraph | awk -v mi=$mi -v sd=$sd '{$4=($4-mi)/sd;print}' > zsc.bedGraph    
bedGraphToBigWig zsc.bedGraph hg19.chrom.sizes Pol2_bigWigMerge_zsc.bw

rm merge* zsc.bedGraph

Data aquisition

  • Tracks comes from ENCODE project and were downloaded from GEO
  • For stronger signal replicates are combined using "bigWigMerge" tool
  • Combined tracks are z-scored using bash script with mean and sd calculated by "bigWigInfo" tool"

Example of the data pre-processing;

bigWigMerge GSM803485_hg19_wgEncodeHaibTfbsGm12878Pol24h8Pcr1xRawRep2.bigWig \
GSM803485_hg19_wgEncodeHaibTfbsGm12878Pol24h8Pcr1xRawRep1.bigWig \
GSM803355_hg19_wgEncodeHaibTfbsGm12878Pol2Pcr2xRawRep2.bigWig \
GSM803355_hg19_wgEncodeHaibTfbsGm12878Pol2Pcr2xRawRep1.bigWig merge.bedGraph

bedGraphToBigWig merge.bedGraph hg19.chrom.sizes merge.bw

mi=$(bigWigInfo merge.bw | grep mean | sed -n 's/mean: //pg')
sd=$(bigWigInfo merge.bw | grep std | sed -n 's/std: //pg')
cat merge.bedGraph | awk -v mi=$mi -v sd=$sd '{$4=($4-mi)/sd;print}' > zsc.bedGraph    
bedGraphToBigWig zsc.bedGraph hg19.chrom.sizes Pol2_bigWigMerge_zsc.bw

rm merge* zsc.bedGraph

Loading data and libraries

require(seqplots)
require(magrittr)

source('fn_plotTopBar.R'); plotTopBar <- plotTopBar
devtools::load_all("/Users/przemol/code/seqplotsR")

tracks <- dir('tracks', pattern = 'bw', full.names = TRUE)
features <- dir('features', pattern = 'bed', full.names = TRUE)

tracks
## [1] "tracks/H3k4me3_bigWigMerge_zsc.bw" "tracks/MNase_bigWigMerge_zsc.bw"  
## [3] "tracks/Pol2_bigWigMerge_zsc.bw"
features
## [1] "features/TSS_band1.bed" "features/TSS_band2.bed"
## [3] "features/TSS_band3.bed" "features/TSS_band4.bed"
## [5] "features/TSS_band5.bed"

Data process

m1 <- getPlotSetArray(
    tracks = tracks,
    features = grep('band1', features, value = TRUE),
    refgenome = 'hg19',
    bin = 10L, xmin = 1500L, xmax = 3000L
)

m2 <- getPlotSetArray(
    tracks = grep('H3K4me3', tracks, value = TRUE, ignore.case = TRUE),
    features = grep('1|3|5', features, value = TRUE),
    refgenome = 'hg19',
    bin = 10L, xmin = 1500L, xmax = 3000L
)

ms <- MotifSetup()
ms$addBigWig(tracks[2])$addBigWig(tracks[3])$addBigWig(tracks[1])
## MotifSetup with 3 motifs/tracks.
ms$addMotif('CG', revcomp = FALSE, name = "CpG")
## MotifSetup with 4 motifs/tracks.
M <- getPlotSetArray(
    tracks = ms,
    features = features[1],
    refgenome = 'hg19',
    bin = 10L, xmin = 1000L, xmax = 1500L
)

Figures

Plot A

plotAverage(
    m1, labels = colnames(m1$as.array())  %>% strsplit('_')  %>% sapply('[[', 1), 
    legend_pos = 'topleft', legend_ext = TRUE, legend_ext_pos = 'topright',
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22,
    main = "Top expression quintile", xlab = "TSS", ylab = "Z-scored ChIP signal",
    plotScale = "", #'zscore', 'log2'
)

dev.print(pdf, file='fa2.pdf'); dev.print(png, file='fa2.png', height=8.27, width=11.7, res=72, units = "in");
## pdf 
##   2
## pdf 
##   2

Plot B

plotAverage(
    m2, labels = c("TSS_top_20%", "TSS_middle_20%", "TSS_bottom_20%"), 
    legend_pos = 'topleft', legend_ext = TRUE, legend_ext_pos = 'topright',
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22,
    main = "Top expression quintile", xlab = "TSS", ylab = "Z-scored ChIP signal"
)

dev.print(pdf, file='fb2.pdf'); dev.print(png, file='fb2.png', height=8.27, width=11.7, res=72, units = "in");
## pdf 
##   2
## pdf 
##   2

Plot C

source( 'fn_plotTopBar.R' ); plotTopBar <- plotTopBar
plotTopBar(M, M$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1))

dev.print(pdf, file='fc.pdf'); dev.print(png, file='fc.png', height=2*4, width=6*4, res=72, units = "in");
## pdf 
##   2
## pdf 
##   2

Plot D

cls <- plotHeatmap(
    M, labels = colnames(M$as.array())  %>% strsplit('_')  %>% sapply('[[', 1),
    raster = TRUE, include = c(F,F,T,F), sortrows = 'decreasing', clusters = 3,
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22, 
    colvec = list(c('white', 'black', 'black'), NA, NA, c('white', 'darkgreen', 'darkgreen')),
    #o_min = c(-1, -2, -1, 8),
    #o_max = c(3, 8, 4, 16)
)

dev.print(pdf, file='fd.pdf'); dev.print(png, file='fd.png', height=8.27, width=11.7, res=72, units = "in");
## quartz_off_screen 
##                 2
## quartz_off_screen 
##                 2

Plot E

export.bed(import.bed(features[1])[cls$ClusterID==1], 'flt.bed')
MF <- getPlotSetArray(
    tracks = ms,
    features = 'flt.bed',
    refgenome = 'hg19',
    bin = 10L, xmin = 1000L, xmax = 1500L
)
plotTopBar(MF, MF$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1) )

dev.print(pdf, file='fe.pdf'); dev.print(png, file='fe.png', height=2*4, width=6*4, res=72, units = "in");
## pdf 
##   2
## pdf 
##   2

Plot F

plotHeatmap(
    MF, labels = M$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1),
    raster = TRUE, include = c(F,T,T,F), sortrows = 'decreasing',
    clstmethod = 'ssom', ssomt1 = 2, ssomt2 = 2, clusters = 3,
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22,
    #clspace = c('darkblue', 'white', 'darkred'),
    colvec = list(NA, NA, NA, c('white', 'darkgreen', 'darkgreen')),
    #o_min = c(-1, -2, -1, 8), #o_max = c(3, 8, 4, 16)
)

dev.print(pdf, file='ff.pdf'); dev.print(png, file='ff.png', height=8.27, width=11.7, res=72, units = "in");
## quartz_off_screen 
##                 2
## quartz_off_screen 
##                 2

Assembled figure (click for full page)

Alternative figures

Plot A

plotAverage(
    m1, labels = colnames(m1$as.array())  %>% strsplit('_')  %>% sapply('[[', 1), 
    legend_pos = 'topleft', legend_ext = TRUE, legend_ext_pos = 'topright',
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22,
    main = "Top expression quintile", xlab = "TSS", ylab = "Z-scored ChIP signal",
    plotScale = "zscore", #'zscore', 'log2'
)

dev.print(pdf, file='fa2lz.pdf')
## pdf 
##   2